Getting Started

# loading the required packages
library(ggplot2)
library(ggmap)
library(geosphere)

Get Locations

# creating a sample data.frame with your lat/lon points
cities = c("Atlanta", "Boston", "Chicago", "Denver", 
           "Houston", "Los Angeles", "New Orleans", 
           "New York", "Pittisburgh", "Salt Lake City", "San Francisco", "Seattle")

locations = geocode(cities)

# getting the map
map <- get_map(location = "Kansas", zoom = 4,
                      maptype = "satellite", scale = 2)

Plot Locations on the Map

lat = locations$lat 
lon = locations$lon

df <- as.data.frame(cbind(cities,lon,lat))

df$lon = as.numeric(as.character(df$lon))
df$lat = as.numeric(as.character(df$lat))
# plotting the map with some points on it
p <- ggmap(map)+ 
     geom_point(data=df[,c(2,3)], 
                aes(x=lon, y=lat), size=2)
p

Converting Distances between Cities into Binary Matrix

# Get distances between each city
distances = distm(locations, fun = distHaversine)

# Convert meters to mile
meterToMile = 0.000621371
colnames(distances) = cities
rownames(distances) = cities

# Convert matrix meters to mile. 
distancesMiles = distances*meterToMile

distancesMilesBinary = distancesMiles <= 750 # All hubs within 750 miles of each other. 

# Make sure each row has at least 1 integer. 
as.vector(rowSums(distancesMilesBinary) != 0)
##  [1] TRUE TRUE TRUE TRUE TRUE TRUE TRUE TRUE TRUE TRUE TRUE TRUE
# Constraint Matrix
distancesMilesBinary
##                Atlanta Boston Chicago Denver Houston Los Angeles
## Atlanta           TRUE  FALSE    TRUE  FALSE    TRUE       FALSE
## Boston           FALSE   TRUE   FALSE  FALSE   FALSE       FALSE
## Chicago           TRUE  FALSE    TRUE  FALSE   FALSE       FALSE
## Denver           FALSE  FALSE   FALSE   TRUE   FALSE       FALSE
## Houston           TRUE  FALSE   FALSE  FALSE    TRUE       FALSE
## Los Angeles      FALSE  FALSE   FALSE  FALSE   FALSE        TRUE
## New Orleans       TRUE  FALSE   FALSE  FALSE    TRUE       FALSE
## New York          TRUE   TRUE    TRUE  FALSE   FALSE       FALSE
## Pittisburgh       TRUE   TRUE    TRUE  FALSE   FALSE       FALSE
## Salt Lake City   FALSE  FALSE   FALSE   TRUE   FALSE        TRUE
## San Francisco    FALSE  FALSE   FALSE  FALSE   FALSE        TRUE
## Seattle          FALSE  FALSE   FALSE  FALSE   FALSE       FALSE
##                New Orleans New York Pittisburgh Salt Lake City
## Atlanta               TRUE     TRUE        TRUE          FALSE
## Boston               FALSE     TRUE        TRUE          FALSE
## Chicago              FALSE     TRUE        TRUE          FALSE
## Denver               FALSE    FALSE       FALSE           TRUE
## Houston               TRUE    FALSE       FALSE          FALSE
## Los Angeles          FALSE    FALSE       FALSE           TRUE
## New Orleans           TRUE    FALSE       FALSE          FALSE
## New York             FALSE     TRUE        TRUE          FALSE
## Pittisburgh          FALSE     TRUE        TRUE          FALSE
## Salt Lake City       FALSE    FALSE       FALSE           TRUE
## San Francisco        FALSE    FALSE       FALSE           TRUE
## Seattle              FALSE    FALSE       FALSE           TRUE
##                San Francisco Seattle
## Atlanta                FALSE   FALSE
## Boston                 FALSE   FALSE
## Chicago                FALSE   FALSE
## Denver                 FALSE   FALSE
## Houston                FALSE   FALSE
## Los Angeles             TRUE   FALSE
## New Orleans            FALSE   FALSE
## New York               FALSE   FALSE
## Pittisburgh            FALSE   FALSE
## Salt Lake City          TRUE    TRUE
## San Francisco           TRUE    TRUE
## Seattle                 TRUE    TRUE

Solving the LP

The minimum number of hub locations to cover all cities is 3.

library(lpSolve)
# solving in R
c <- rep(1, 12)  # vector to minimize
A = distancesMilesBinary # binary matrix representing distances between cities < 750 miles
dir <- rep(">=", 12)  # inequality vector 
b <- rep(1, 12) # vector indicating right side of matrix 

Let’s talk about how this works. Each city must be represented once. TRUE is a binary indicator for 1. So for the first row, Atlanta must be represented by either itself, Chicago, Houston, New Orleans, New Orleans, New York, or Pittisburgh.

cbind(A, dir, b)[1:3,]
##         Atlanta Boston  Chicago Denver  Houston Los Angeles New Orleans
## Atlanta "TRUE"  "FALSE" "TRUE"  "FALSE" "TRUE"  "FALSE"     "TRUE"     
## Boston  "FALSE" "TRUE"  "FALSE" "FALSE" "FALSE" "FALSE"     "FALSE"    
## Chicago "TRUE"  "FALSE" "TRUE"  "FALSE" "FALSE" "FALSE"     "FALSE"    
##         New York Pittisburgh Salt Lake City San Francisco Seattle dir  b  
## Atlanta "TRUE"   "TRUE"      "FALSE"        "FALSE"       "FALSE" ">=" "1"
## Boston  "TRUE"   "TRUE"      "FALSE"        "FALSE"       "FALSE" ">=" "1"
## Chicago "TRUE"   "TRUE"      "FALSE"        "FALSE"       "FALSE" ">=" "1"
s <- lp("min", c, A, dir, b, all.int = TRUE)
s$status
## [1] 0
s$solution
##  [1] 1 0 0 0 0 0 0 1 0 1 0 0
s$objval
## [1] 3
sol = s$solution
names(sol) = cities
sol
##        Atlanta         Boston        Chicago         Denver        Houston 
##              1              0              0              0              0 
##    Los Angeles    New Orleans       New York    Pittisburgh Salt Lake City 
##              0              0              1              0              1 
##  San Francisco        Seattle 
##              0              0

So, we’ll put a hub in Atlanta New York, and Salt Lake City. This minimizes the distance to all other locations.

p <- ggmap(map)+ 
     geom_point(data=df[,c(2,3)], 
                aes(x=lon, y=lat), size=2) +
     geom_point(data=df[TRUE ==as.vector(sol),], cex = 4, colour = "light blue")
p